library(foreign)
library(Zelig)
library(MatchIt)
library(cem)


grpdata<-read.csv("grpdataset.csv")
grpdata$X<-NULL

#Making binary explanatory variable
grpdata$seg.card<-ifelse(grpdata$seg>0.8,1,0)
grpdata$shareincl.card<-ifelse(1-grpdata$share.included>0.5,1,0)
#Defining cuts for CEM
exclgrps.cut<-c(0,1,2,5)
b.cut<-c(0,0.25,0.5,0.75)
zone.cut<-c(0,1,2,3)

# Effect of ethnic homogeneity with full sample of excluded groups
# using dichotomous seg, and controlling only for b and exclgrps 
grpdata.f1.m1<-grpdata[,c('seg.card','exclgrps','rebel','b')]
match.f1.m1<-match.data(matchit(seg.card~b+exclgrps,data=na.omit(grpdata.f1.m1),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f1.m1$seg.card,data=match.f1.m1,drop=c("rebel","seg.card","distance","weights","subclass"))
z.f1.m1<-zelig(rebel~seg.card+b+exclgrps,data=match.f1.m1,model="relogit")
z.f1.m1
sqrt(diag(abs(vcov(z.f1.m1))))

# using share_included
grpdata.f1.m2<-grpdata[,c('shareincl.card','exclgrps','rebel','b')]
match.f1.m2<-match.data(matchit(shareincl.card~b+exclgrps,data=na.omit(grpdata.f1.m2),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f1.m2$shareincl.card,data=match.f1.m2,drop=c("rebel","shareincl.card","distance","weights","subclass"))
z.f1.m2<-zelig(rebel~shareincl.card+b+exclgrps,data=match.f1.m2,model="relogit")
z.f1.m2
sqrt(diag(abs(vcov(z.f1.m2))))

# b and exclgr and lags (but actually lags are probably post treatment because ethnic homogeneity is relatively constant)
grpdata.f2.m1<-grpdata[,c('seg.card','exclgrps','rebel','b','lag.democ','lag.anoc','lag.conflict')]
match.f2.m1<-match.data(matchit(seg.card~b+exclgrps+lag.democ+lag.anoc+lag.conflict,data=na.omit(grpdata.f2.m1),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f2.m1$seg.card,data=match.f2.m1,drop=c("rebel","seg.card","distance","weights","subclass"))
z.f2.m1<-zelig(rebel~seg.card+b+exclgrps+lag.democ+lag.anoc+lag.conflict,data=match.f2.m1,model="relogit")
z.f2.m1
sqrt(diag(abs(vcov(z.f2.m1))))

grpdata.f2.m2<-grpdata[,c('shareincl.card','exclgrps','rebel','b','lag.democ','lag.anoc','lag.conflict')]
match.f2.m2<-match.data(matchit(shareincl.card~b+exclgrps+lag.democ+lag.anoc+lag.conflict,data=na.omit(grpdata.f2.m2),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f2.m2$shareincl.card,data=match.f2.m2,drop=c("rebel","shareincl.card","distance","weights","subclass"))
z.f2.m2<-zelig(rebel~shareincl.card+b+exclgrps+lag.democ+lag.anoc+lag.conflict,data=match.f2.m2,model="relogit")
z.f2.m2
sqrt(diag(abs(vcov(z.f2.m2))))

#b and exclgr and region dummies
grpdata.f3.m1<-grpdata[,c('seg.card','exclgrps','rebel','b',"eeurop","lamerica","ssafrica","asia" )]
match.f3.m1<-match.data(matchit(seg.card~b+exclgrps+eeurop+lamerica+ssafrica+asia,data=na.omit(grpdata.f3.m1),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f3.m1$seg.card,data=match.f3.m1,drop=c("rebel","seg.card","distance","weights","subclass"))
z.f3.m1<-zelig(rebel~seg.card+b+exclgrps,data=match.f3.m1,model="relogit")
sqrt(diag(abs(vcov(z.f3.m1))))

grpdata.f3.m2<-grpdata[,c('shareincl.card','exclgrps','rebel','b',"eeurop","lamerica","ssafrica","asia")]
match.f3.m2<-match.data(matchit(shareincl.card~b+exclgrps+eeurop+lamerica+ssafrica+asia,data=na.omit(grpdata.f3.m2),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f3.m2$shareincl.card,data=match.f3.m2,drop=c("rebel","shareincl.card","distance","weights","subclass"))
z.f3.m2<-zelig(rebel~shareincl.card+b+exclgrps+eeurop+lamerica+ssafrica+asia,data=match.f3.m2,model="relogit")
sqrt(diag(abs(vcov(z.f3.m2))))

#b and exclgr and region dummies and GDP (although it might be a collider or post-treatment)
grpdata.f4.m1<-grpdata[,c('seg.card','exclgrps','rebel','b','lgdpcapl',"eeurop","lamerica","ssafrica","asia" )]
match.f4.m1<-match.data(matchit(seg.card~b+exclgrps+lgdpcapl+eeurop+lamerica+ssafrica+asia,data=na.omit(grpdata.f4.m1),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f4.m1$seg.card,data=match.f4.m1,drop=c("rebel","seg.card","distance","weights","subclass"))
z.f4.m1<-zelig(rebel~seg.card+b+exclgrps,data=match.f4.m1,model="relogit")
sqrt(diag(abs(vcov(z.f4.m1))))

grpdata.f4.m2<-grpdata[,c('shareincl.card','exclgrps','rebel','b','lgdpcapl',"eeurop","lamerica","ssafrica","asia")]
match.f4.m2<-match.data(matchit(shareincl.card~b+exclgrps+lgdpcapl+eeurop+lamerica+ssafrica+asia,data=na.omit(grpdata.f4.m2),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f4.m2$shareincl.card,data=match.f4.m2,drop=c("rebel","shareincl.card","distance","weights","subclass"))
z.f4.m2<-zelig(rebel~shareincl.card+b+exclgrps+eeurop+lamerica+ssafrica+asia,data=match.f4.m2,model="relogit")
z.f4.m2
sqrt(diag(abs(vcov(z.f4.m2))))

#b and exclgr and region dummies and regime type
grpdata.f5.m1<-grpdata[,c('seg.card','exclgrps','rebel','b','anocl','democl',"eeurop","lamerica","ssafrica","asia" )]
match.f5.m1<-match.data(matchit(seg.card~b+exclgrps+anocl+democl+eeurop+lamerica+ssafrica+asia,data=na.omit(grpdata.f5.m1),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f5.m1$seg.card,data=match.f5.m1,drop=c("rebel","seg.card","distance","weights","subclass"))
z.f5.m1<-zelig(rebel~seg.card+b+exclgrps,data=match.f5.m1,model="relogit")
z.f5.m1
sqrt(diag(abs(vcov(z.f5.m1))))

grpdata.f5.m2<-grpdata[,c('shareincl.card','exclgrps','rebel','b','anocl','democl',"eeurop","lamerica","ssafrica","asia")]
match.f5.m2<-match.data(matchit(shareincl.card~b+exclgrps+anocl+democl+eeurop+lamerica+ssafrica+asia,data=na.omit(grpdata.f5.m2),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut)))
imbalance(group=match.f5.m2$shareincl.card,data=match.f5.m2,drop=c("rebel","shareincl.card","distance","weights","subclass"))
z.f5.m2<-zelig(rebel~shareincl.card+b+exclgrps+eeurop+lamerica+ssafrica+asia,data=match.f5.m2,model="relogit")
z.f5.m2
sqrt(diag(abs(vcov(z.f5.m2))))

#b and exclgr and distance
grpdata.f6.m1<-grpdata[,c('seg.card','exclgrps','rebel','b','avg.zone')]
match.f6.m1<-match.data(matchit(seg.card~b+exclgrps+avg.zone,data=na.omit(grpdata.f6.m1),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut,avg.zone=zone.cut)))
imbalance(group=match.f6.m1$seg.card,data=match.f6.m1,drop=c("rebel","seg.card","distance","weights","subclass"))

z.f6.m1<-zelig(rebel~seg.card+b+exclgrps+avg.zone,data=match.f1.m1,model="relogit")
z.f6.m1
sqrt(diag(abs(vcov(z.f6.m1))))

# using share_included
grpdata.f6.m2<-grpdata[,c('shareincl.card','exclgrps','rebel','b','avg.zone')]
match.f6.m2<-match.data(matchit(shareincl.card~b+exclgrps+avg.zone,data=na.omit(grpdata.f6.m2),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut,avg.zone=zone.cut)))
imbalance(group=match.f6.m2$shareincl.card,data=match.f6.m2,drop=c("rebel","shareincl.card","distance","weights","subclass"))
z.f6.m2<-zelig(rebel~shareincl.card+b+exclgrps+avg.zone,data=match.f6.m2,model="relogit")
z.f6.m2
sqrt(diag(abs(vcov(z.f6.m2))))

#b and exclgr and distance + terrain
grpdata.f7.m1<-grpdata[,c('seg.card','exclgrps','rebel','b','avg.zone','avg.mnt')]
match.f7.m1<-match.data(matchit(seg.card~b+exclgrps+avg.zone,data=na.omit(grpdata.f7.m1),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut,avg.zone=zone.cut)))
imbalance(group=match.f7.m1$seg.card,data=match.f7.m1,drop=c("rebel","seg.card","distance","weights","subclass"))
#compare to cem cem(treatment="seg.card",data=grpdata.f1.m1,drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut))
z.f7.m1<-zelig(rebel~seg.card+b+exclgrps+avg.zone+avg.mnt,data=match.f1.m1,model="relogit")
z.f7.m1
sqrt(diag(abs(vcov(z.f7.m1))))


grpdata.f7.m2<-grpdata[,c('shareincl.card','exclgrps','rebel','b','avg.zone','avg.mnt')]
match.f7.m2<-match.data(matchit(shareincl.card~b+exclgrps+avg.zone+avg.mnt,data=na.omit(grpdata.f7.m2),method="cem",drop=c('rebel'),cutpoints=list(exclgrps=exclgrps.cut,b=b.cut,avg.zone=zone.cut)))
imbalance(group=match.f7.m2$shareincl.card,data=match.f7.m2,drop=c("rebel","shareincl.card","distance","weights","subclass"))
z.f7.m2<-zelig(rebel~shareincl.card+b+exclgrps+avg.zone+avg.mnt,data=match.f7.m2,model="relogit")
z.f7.m2
sqrt(diag(abs(vcov(z.f7.m2))))


______________________________________________________________________________________________________________________________________

# Estimations country level

ctrydata<-read.csv("ctrydataset.csv")
#Making binary outcome variables
ctrydata$rebel.all<-ifelse(ctrydata$rebel.all>0,1,0) #any rebellion
ctrydata$rebel.incl<-ifelse(ctrydata$rebel.incl>0,1,0) #rebellion by excluded
ctrydata$rebel.excl<-ifelse(ctrydata$rebel.excl>0,1,0) # rebellion by included
ctrydata$X<-NULL
ctrydata2<-read.csv("ctrydataset2.csv")
ctrydata2$X<-NULL
ctrydata2$rebel.all<-ifelse(ctrydata2$rebel.all>0,1,0)
ctrydata2$rebel.incl<-ifelse(ctrydata2$rebel.incl>0,1,0)
ctrydata2$rebel.excl<-ifelse(ctrydata2$rebel.excl>0,1,0)

# First looking at the effect of the degree of exclusion in countries that exclude

ctrydata.f1<-ctrydata[ctrydata$share.excl>0,c('rebel.all','anocl','democl')]
#Making binary explanatory variable
ctrydata.f1$card.polineq<-ifelse(ctrydata$share.discrim[ctrydata$share.excl>0]>0.5|ctrydata$share.pwless[ctrydata$share.excl>0]>0.5,1,0)
match.f1<-match.data(matchit(card.polineq~anocl+democl,data=na.omit(ctrydata.f1),method="cem",drop=c('rebel.all')))
imbalance(group=match.f1.m1$card.polineq,data=match.f1,drop=c("rebel.all","card.polineq","distance","weights","subclass"))
z.f1<-zelig(rebel.all~card.polineq+anocl+democl,data=match.f1,model="relogit")
z.f1
sqrt(diag(abs(vcov(z.f1))))

ctrydata.f2<-ctrydata[ctrydata$share.excl>0,c('rebel.excl','anocl','democl')]
#Making binary explanatory variable
ctrydata.f2$card.polineq<-ifelse(ctrydata$share.discrim[ctrydata$share.excl>0]>0.5|ctrydata$share.pwless[ctrydata$share.excl>0]>0.5,1,0)
match.f2<-match.data(matchit(card.polineq~anocl+democl,data=na.omit(ctrydata.f2),method="cem",drop=c('rebel.excl')))
z.f2<-zelig(rebel.excl~card.polineq+anocl+democl,data=match.f2,model="relogit")
z.f2
sqrt(diag(abs(vcov(z.f2))))

ctrydata.f3<-ctrydata[ctrydata$share.excl>0,c('rebel.excl','anocl','democl')]
#Making binary explanatory variable
ctrydata.f3$card.polineq<-ifelse(ctrydata$share.excl[ctrydata$share.excl>0]>0.5]>0.5,1,0)
match.f3<-match.data(matchit(card.polineq~anocl+democl,data=na.omit(ctrydata.f3),method="cem",drop=c('rebel.excl')))
imbalance(group=match.f1.m1$card.polineq,data=match.f3,drop=c("rebel.excl","card.polineq","distance","weights","subclass"))
z.f3<-zelig(rebel.excl~card.polineq+anocl+democl,data=match.f3,model="relogit")
z.f3
sqrt(diag(abs(vcov(z.f1))))

# Looking at the effect of exclusion versus no exclusion in all countries (somewhat a tautology given the outcome variable - see discussion in paper)

ctrydata.f4<-ctrydata[,c('rebel.all','anocl','democl')]
#Making binary explanatory variable
ctrydata.f4$card.polineq<-ifelse(ctrydata$share.discrim>0|ctrydata$share.pwless>0,1,0)
match.f4<-match.data(matchit(card.polineq~anocl+democl,data=na.omit(ctrydata.f4),method="cem",drop=c('rebel.all')))
imbalance(group=match.f1$card.polineq,data=match.f4,drop=c("rebel.all","card.polineq","distance","weights","subclass"))
z.f4<-zelig(rebel.all~card.polineq+anocl+democl,data=match.f4,model="relogit")
z.f4
sqrt(diag(abs(vcov(z.f4))))

ctrydata.f5<-ctrydata[,c('rebel.all','anocl','democl')]
#Making binary explanatory variable
ctrydata.f5$card.polineq<-ifelse(ctrydata$share.excl>0.5,1,0)
match.f5<-match.data(matchit(card.polineq~anocl+democl,data=na.omit(ctrydata.f5),method="cem",drop=c('rebel.all')))
imbalance(group=match.f1$card.polineq,data=match.f5,drop=c("rebel.all","card.polineq","distance","weights","subclass"))
z.f5<-zelig(rebel.all~card.polineq+anocl+democl,data=match.f5,model="relogit")
z.f5
sqrt(diag(abs(vcov(z.f1))))


# Effect of segregation (ethnic homogeneity)
ctrydata.f6<-ctrydata[ctrydata$share.excl>0,c('rebel.excl','exavg.b','exavg.seg','exavg.zone','exavg.avg.mnt')]
ctrydata.f6$card.avgseg<-ifelse(ctrydata.f6$exavg.seg>0.8,1,0)
match.f6<-match.data(matchit(card.avgseg~exavg.b+exavg.zone+exavg.avg.mnt,data=na.omit(ctrydata.f6),method="cem",drop=c('rebel.excl','exavg.seg'),cutpoints=list(exavg.b=b.cut,exavg.zone=zone.cut)))
z.f6<-zelig(rebel.excl~card.avgseg+exavg.b+exavg.zone+exavg.avg.mnt,data=match.f6,model="relogit")
z.f6
sqrt(diag(abs(vcov(z.f6))))
#Quantities of interest: causal effects.
control<-0
treated<-1
x.control<-setx(z.f6,card.avgseg=control)
x.treated<-setx(z.f6,card.avgseg=treated)
s.out<-sim(z.f1,x=x.control, x1=x.treated)
names(s.out)
plot(s.out)


# Looking at the effect of economic inequality of excluded groups relative to included
ctrydata2.f1<-ctrydata2[ctrydata2$share.excl>0&ctrydata2$exavg.seg>0.7,c('rebel.all','democl','anocl','exavg.ratio.ei')]
ctrydata2.f1$ratioei.card<-ifelse(ctrydata2.f1$exavg.ratio.ei<1,1,0)
match.f1<-match.data(matchit(ratioei.card~democl+anocl,data=na.omit(ctrydata2.f1),method="cem",drop=c('rebel.all')))
imbalance(group=match.f1$ratioei.card,data=match.f1,drop=c("rebel.all","ratioei.card","distance","weights","subclass"))
z2.f1<-zelig(rebel.all~ratioei.card+democl+anocl,data=match.f1,model="relogit")
z2.f1
sqrt(diag(abs(vcov(z2.f1))))

x.control<-setx(z2.f1,card.avgseg=control)
x.treated<-setx(z2.f1,card.avgseg=treated)
s.out<-sim(z.f1,x=x.control, x1=x.treated)
names(s.out)
plot(s.out)

ctrydata2.f2<-ctrydata2[ctrydata2$share.excl>0&ctrydata2$exavg.seg>0.7,c('rebel.all','democl','anocl','between')]
match.f2<-match.data(matchit(between~democl+anocl,data=na.omit(ctrydata2.f2),method="cem",drop=c('rebel.all')))
z2.f2<-zelig(rebel.all~between+democl+anocl,data=match.f2,model="relogit")
z2.f2
sqrt(diag(abs(vcov(z2.f2))))


ctrydata2.f3<-ctrydata2[ctrydata2$share.excl>0,c('rebel.all','anocl','democl','exavg.zone','exavg.avg.mnt')]
ctrydata2.f3$ratioctry.card<-ifelse(ctrydata2$exavg.to.ctry<1,1,0)
match.f3<-match.data(matchit(ratioctry.card~anocl+democl+exavg.zone+exavg.avg.mnt,data=na.omit(ctrydata2.f3),method="cem",drop=c('rebel.all'),cutpoints=list(exavg.zone=zone.cut)))
imbalance(group=match.f3$ratioctry.card,data=match.f3,drop=c("rebel.all","ratioctry.card","distance","weights","subclass"))
z2.f3<-zelig(rebel.all~ratioctry.card+anocl+democl,data=match.f3,model="relogit")
z2.f3
sqrt(diag(abs(vcov(z2.f3))))
